Sample-sample correlation

df_counts <- tidyr::pivot_longer(count_matrix,
    cols = 2:ncol(count_matrix),
    names_to = "sample", values_to = "n_reads"
)

# sort
df_counts <- arrange(df_counts, sample)
#df_counts <- left_join(df_samplesheet, df_counts)

df_correlation <- df_counts %>%
    tidyr::pivot_wider(names_from = "sample", values_from = "n_reads") %>%
    dplyr::select(-c(1)) %>%
    cor()

plot_replicate_correlation <- df_correlation %>%
    dplyr::as_tibble() %>%
    dplyr::mutate(sample1 = colnames(.)) %>%
    tidyr::pivot_longer(
        cols = !sample1,
        names_to = "sample2", values_to = "cor_coef"
    ) %>%
    ggplot(aes(x = sample1, y = sample2, fill = cor_coef)) +
    geom_tile() +
    geom_text(color = grey(0.4), aes(label = round(cor_coef, 2))) +
    theme_light() +
    labs(title = "", x = "", y = "") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
    scale_fill_gradientn(
        colours = c("#E7298A", grey(0.9), "#66A61E"),
        limits = c(-1, 1)
    )

plot_replicate_correlation

ggsave("../plots/correlation/pdf/correlation_samples.pdf", plot=plot_replicate_correlation, width=12, height=12)
ggsave("../plots/correlation/png/correlation_samples.png", plot=plot_replicate_correlation, width=12, height=12)

# https://bioinformatics.stackexchange.com/questions/22502/manually-set-range-of-colour-scale-in-pheatmap-in-r
color.divisions <- 100

annotation_days = data.frame(row.names=unique(row.names(df_correlation)), generation=as.character(c(0, 0, 0,0, 1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7,0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5)), condition=c(rep("cont",32), rep("LD", 18)), replicate=as.character(c(rep(c(1,2,3,4),8), rep(c(5,6,7), 6)))) 

# https://stackoverflow.com/questions/41628450/r-pheatmap-change-annotation-colors-and-prevent-graphics-window-from-popping-up
# choose colors for replicate that make difference between the two conditions clearly obvious --> four colours that are similar for HC, three that are similar for LD, let replicate 3 of LD _pop_
# choose gradient of colors for generations
# e.g. Tol from https://davidmathlogic.com/colorblind/#%23D81B60-%231E88E5-%23FFC107-%23004D40
okabe <- c("#f0e442ff", "#e69f00ff", "#d55e00ff", "#cc79a7ff", "#009e73ff", "#56b4e9ff", "#0072b2ff", "#aaaaaaff")
tol <- c("#882255ff","#aa4499ff","#cc6677ff","#ddcc77ff","#88cceeff","#44aa99ff","#117733ff","#332288ff")
okabe_gen <- okabe
names(okabe_gen) <- unique(annotation_days$generation)
okabe_replic <- okabe[1:7]
names(okabe_replic) <- unique(annotation_days$replicate)
annotation_color_list <- list(condition=c("cont"=okabe[7], "LD"=okabe[2]), generation=okabe_gen, replicate=okabe_replic)

okabe <- c("#fff2ecff","#f0e442ff", "#f5a700ff", "#9a4400ff", "#cc79a7ff", "#56b4e9ff", "#0072b2ff", "#003450ff")
okabe_gen <- okabe
names(okabe_gen) <- unique(annotation_days$generation)
based_on_col <- c("#009affff","#007acbff","#005b98ff","#003d66ff","#ff9dd5ff","#cc84aeff","#9a5b83ff")
names(based_on_col) <- unique(annotation_days$replicate)
annotation_color_list <- list(condition=c("cont"="#005a96ff", "LD"="#c57ba5ff"), replicate=based_on_col, generation=okabe_gen)

p <- pheatmap(df_correlation, display_numbers=TRUE, treeheight_col=0, cutree_rows = 3, cutree_cols = 3, annotation_row = annotation_days, annotation_colors = annotation_color_list, breaks = seq(-1,1, length.out=(color.divisions + 1)))
p

ggsave("../plots/correlation/png/correlation_samples_clustering.png", plot=p, width=11.5, height=8)
ggsave("../plots/correlation/pdf/correlation_samples_clustering.pdf", plot=p, width=11.5, height=8)

p <- pheatmap(df_correlation, display_numbers=FALSE, treeheight_col=0, cutree_rows = 3, cutree_cols = 3, annotation_row = annotation_days, annotation_colors = annotation_color_list, breaks = seq(-1,1, length.out=(color.divisions + 1)))
p

ggsave("../plots/correlation/png/correlation_samples_clustering_woNumbers.png", plot=p, width=11.5, height=8)
ggsave("../plots/correlation/pdf/correlation_samples_clustering_woNumbers.pdf", plot=p, width=11.5, height=8)

Session Info

## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=sv_SE.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=sv_SE.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=sv_SE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=sv_SE.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Stockholm
## tzcode source: system (glibc)
## 
## attached base packages:
##  [1] tcltk     grid      stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] pheatmap_1.0.12             ggVennDiagram_1.5.2        
##  [3] Mfuzz_2.64.0                DynDoc_1.82.0              
##  [5] widgetTools_1.82.0          e1071_1.7-14               
##  [7] edgeR_4.2.0                 limma_3.60.2               
##  [9] ComplexHeatmap_2.20.0       Heatplus_3.12.0            
## [11] ggnewscale_0.4.10           ggrepel_0.9.5              
## [13] colorblindr_0.1.0           colorspace_2.1-1           
## [15] DescTools_0.99.54           DESeq2_1.44.0              
## [17] SummarizedExperiment_1.34.0 Biobase_2.64.0             
## [19] MatrixGenerics_1.16.0       matrixStats_1.3.0          
## [21] GenomicRanges_1.56.0        GenomeInfoDb_1.40.1        
## [23] IRanges_2.38.0              S4Vectors_0.42.0           
## [25] BiocGenerics_0.50.0         lubridate_1.9.3            
## [27] forcats_1.0.0               stringr_1.5.1              
## [29] dplyr_1.1.4                 purrr_1.0.2                
## [31] readr_2.1.5                 tidyr_1.3.1                
## [33] tibble_3.2.1                tidyverse_2.0.0            
## [35] ggplot2_3.5.1              
## 
## loaded via a namespace (and not attached):
##  [1] gld_2.6.6               readxl_1.4.3            rlang_1.1.3            
##  [4] magrittr_2.0.3          clue_0.3-65             GetoptLong_1.0.5       
##  [7] compiler_4.4.1          systemfonts_1.1.0       png_0.1-8              
## [10] vctrs_0.6.5             shape_1.4.6.1           pkgconfig_2.0.3        
## [13] crayon_1.5.2            fastmap_1.2.0           XVector_0.44.0         
## [16] labeling_0.4.3          utf8_1.2.4              rmarkdown_2.27         
## [19] tzdb_0.4.0              UCSC.utils_1.0.0        ragg_1.3.2             
## [22] bit_4.0.5               xfun_0.44               zlibbioc_1.50.0        
## [25] cachem_1.1.0            jsonlite_1.8.8          highr_0.11             
## [28] DelayedArray_0.30.1     BiocParallel_1.38.0     cluster_2.1.6          
## [31] parallel_4.4.1          R6_2.5.1                bslib_0.7.0            
## [34] stringi_1.8.4           RColorBrewer_1.1-3      boot_1.3-30            
## [37] jquerylib_0.1.4         cellranger_1.1.0        Rcpp_1.0.12            
## [40] iterators_1.0.14        knitr_1.47              Matrix_1.6-5           
## [43] timechange_0.3.0        tidyselect_1.2.1        rstudioapi_0.16.0      
## [46] abind_1.4-5             yaml_2.3.8              doParallel_1.0.17      
## [49] codetools_0.2-19        lattice_0.22-5          withr_3.0.0            
## [52] evaluate_0.23           proxy_0.4-27            circlize_0.4.16        
## [55] pillar_1.9.0            tkWidgets_1.82.0        foreach_1.5.2          
## [58] generics_0.1.3          vroom_1.6.5             hms_1.1.3              
## [61] munsell_0.5.1           scales_1.3.0            rootSolve_1.8.2.4      
## [64] class_7.3-22            glue_1.7.0              lmom_3.0               
## [67] tools_4.4.1             data.table_1.15.4       locfit_1.5-9.9         
## [70] Exact_3.2               mvtnorm_1.2-5           GenomeInfoDbData_1.2.12
## [73] cli_3.6.2               textshaping_0.4.0       fansi_1.0.6            
## [76] expm_0.999-9            S4Arrays_1.4.1          gtable_0.3.5           
## [79] sass_0.4.9              digest_0.6.35           SparseArray_1.4.8      
## [82] farver_2.1.2            rjson_0.2.21            htmltools_0.5.8.1      
## [85] lifecycle_1.0.4         httr_1.4.7              statmod_1.5.0          
## [88] GlobalOptions_0.1.2     bit64_4.0.5             MASS_7.3-61